FROM  ubuntu:20.04

LABEL maintainer="FredyVia <fredyvia@qq.com>"

ENV   BASE_URL=http://mirrors.tencentyun.com/apache/spark/ \
      SPARK_VERSION=3.3.1 \
      HADOOP_VERSION=3

RUN   sed -i "s@http://.*archive.ubuntu.com@http://mirrors.tencentyun.com@g" /etc/apt/sources.list \
      && sed -i "s@http://.*security.ubuntu.com@http://mirrors.tencentyun.com@g" /etc/apt/sources.list \
      && apt update \
      #Give permission to execute scripts
      && DEBIAN_FRONTEND=noninteractive apt install -y wget python3 python3-pip openjdk-8-jre --no-install-recommends && apt clean \
      && wget ${BASE_URL}/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
      && tar -xf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
      && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \
      && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz

# RUN   pip3 install --no-cache-dir  -i http://mirrors.tencentyun.com/pypi/simple/ --trusted-host mirrors.tencentyun.com numpy
      
# Fix the value of PYTHONHASHSEED
# Note: this is needed when you use Python 3.3 or greater
ENV PYTHONHASHSEED 1
